package org.carwler.main;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.carwler.pojo.HouseInfo;
import org.carwler.util.ConnectionUtil;

public class Analyze {

	public List<HouseInfo> regexMain(int page) {
		String address = "http://bj.lianjia.com/ershoufang/pg"+ page +"/";
		String result = ConnectionUtil.Connect(address);
		List<HouseInfo> houseList = new ArrayList<HouseInfo>();
		regexTitle(houseList, result);
		if (houseList != null) {
			regexHouseInfo(houseList, result);
			regexFloorInfo(houseList, result);
			regexTotalPrice(houseList, result);
			regexUnitPrice(houseList, result);
		}
		return houseList;
	}

	private void regexTitle(List<HouseInfo> houseList, String targetStr) {
		Pattern pattern1 = Pattern
				.compile("<div class=\"title\">\\s{1,}<a(.+?)</div>");
		Matcher matcher1 = pattern1.matcher(targetStr);
		while (matcher1.find()) {
			//设置title
			Pattern pattern2 = Pattern.compile(">[\u0391-\uFFE5]+.+?</a>");
			Matcher matcher2 = pattern2.matcher(matcher1.group());
			String matcherString = "";
			HouseInfo info = new HouseInfo();
			if (matcher2.find()) {
				matcherString = matcher2.group();
				info.setTitle(matcherString.substring(1,
						matcherString.length() - 4));
			}
			//设置外连接
			pattern2 = Pattern.compile("((http|https)://)?([\\w-]+\\.)+[\\w-]+(/[\\w-./?%&=]*)?");
			matcher2 = pattern2.matcher(matcher1.group());
			if (matcher2.find()) {
				info.setLink(matcher2.group());
			}
			houseList.add(info);
		}
	}

	private void regexHouseInfo(List<HouseInfo> houseList, String targetStr) {
		Pattern pattern1 = Pattern
				.compile("<div class=\"houseInfo\">(.+?)</div>");// 为什么加？才行
		Matcher matcher1 = pattern1.matcher(targetStr);
		int i = 0;
		while (matcher1.find()) {
			Pattern pattern2 = Pattern.compile(">[\u0391-\uFFE5]+(.+?)</div>");
			Matcher matcher2 = pattern2.matcher(matcher1.group());
			String matcherString = "";
			if (matcher2.find()) {
				matcherString = matcher2.group();
				String info = matcherString.substring(1,
						matcherString.length() - 6).replace("</a>", "");
				houseList.get(i).setInfo(info);
			}
			++i;
		}
	}

	private void regexFloorInfo(List<HouseInfo> houseList, String targetStr) {
		Pattern pattern1 = Pattern
				.compile("<div class=\"positionInfo\">\\s{1,}(.+?)<a");// 为什么加？才行
		Matcher matcher1 = pattern1.matcher(targetStr);
		int i = 0;
		while (matcher1.find()) {
			Pattern pattern2 = Pattern.compile(">[\u0391-\uFFE5]+(.+?)<a");
			Matcher matcher2 = pattern2.matcher(matcher1.group());
			String matcherString = "";
			if (matcher2.find()) {
				matcherString = matcher2.group();
				String floor = matcherString.substring(1,
						matcherString.length() - 5);
				houseList.get(i).setFloor(floor);
			}
			++i;
		}
	}

	private void regexTotalPrice(List<HouseInfo> houseList, String targetStr) {
		Pattern pattern1 = Pattern
				.compile("<div class=\"totalPrice\">.+?</div>");// 为什么加？才行
		Matcher matcher1 = pattern1.matcher(targetStr);
		int i = 0;
		while (matcher1.find()) {
			Pattern pattern2 = Pattern.compile("<span>.+</div>");
			Matcher matcher2 = pattern2.matcher(matcher1.group());
			String matcherString = "";
			if (matcher2.find()) {
				matcherString = matcher2.group();
				String totalPrice = matcherString.substring(6,
						matcherString.length() - 6).replace("</span>", "");
				houseList.get(i).setTotalPrice(totalPrice);
			}
			++i;
		}
	}

	private void regexUnitPrice(List<HouseInfo> houseList, String targetStr) {
		Pattern pattern1 = Pattern.compile("<span>单价.+?</span>");// 为什么加？才行
		Matcher matcher1 = pattern1.matcher(targetStr);
		int i = 0;
		String matcherString = "";
		while (matcher1.find()) {
			matcherString = matcher1.group();
			String unitPrice = matcherString.substring(6,
					matcherString.length() - 7);
			houseList.get(i).setUnitPrice(unitPrice);
			++i;
		}
	}
}
